1 Packages required

library(methylKit)
## Warning: package 'methylKit' was built under R version 3.5.1
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 3.5.1
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 3.5.1
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 3.5.1
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 3.5.1
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 3.5.1
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
library(DT)

2 Negative control

2.1 Input files

files.neg <- list.files("./input", pattern = "neg", full.names = T)

files.list.neg <- as.list(files.neg)
names(files.list.neg) <- as.character(sapply(basename(files.neg), function(x) strsplit(x, "\\.")[[1]][1]))

myobj.neg <- methRead(
  location = files.list.neg, sample.id = as.list(names(files.list.neg)),
  assembly = "hg19", pipeline = "bismarkCoverage", header = F, skip = 0,
  dbtype = "tabix", treatment = c(0,1,0,1,0,1), dbdir = "./output/methylDB"
)
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep1.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep2.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep3.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep4.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep5.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_neg_control_rep6.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...

2.2 Histogram of CpG methylation

par(mfrow = c(3, 2), cex = 0.6, cex.axis = 1.3, cex.lab = 1.3, cex.main = 1.2)
tmp <- sapply(1:length(files.list.neg), function(x)
  getMethylationStats(myobj.neg[[x]], plot = TRUE, both.strands = FALSE))

2.3 Histogram of CpG coverage

par(mfrow = c(3, 2), cex = 0.6, cex.axis = 1.3, cex.lab = 1.3, cex.main = 1.2)
tmp <- sapply(1:length(files.list.neg), function(x)
  getCoverageStats(myobj.neg[[x]], plot = TRUE, both.strands = FALSE))

2.4 Merging samples

meth.neg <- unite(myobj.neg)
## uniting...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylBase_2b0365908e01.txt.bgz

2.5 Sample Correlation

getCorrelation(meth.neg, plot = TRUE)
##                       data_neg_control_rep1 data_neg_control_rep2
## data_neg_control_rep1             1.0000000             0.8261030
## data_neg_control_rep2             0.8261030             1.0000000
## data_neg_control_rep3             0.8172165             0.8124292
## data_neg_control_rep4             0.8159040             0.8095813
## data_neg_control_rep5             0.8169668             0.8105951
## data_neg_control_rep6             0.8272767             0.8262891
##                       data_neg_control_rep3 data_neg_control_rep4
## data_neg_control_rep1             0.8172165             0.8159040
## data_neg_control_rep2             0.8124292             0.8095813
## data_neg_control_rep3             1.0000000             0.8000120
## data_neg_control_rep4             0.8000120             1.0000000
## data_neg_control_rep5             0.8083899             0.8043958
## data_neg_control_rep6             0.8200500             0.8145591
##                       data_neg_control_rep5 data_neg_control_rep6
## data_neg_control_rep1             0.8169668             0.8272767
## data_neg_control_rep2             0.8105951             0.8262891
## data_neg_control_rep3             0.8083899             0.8200500
## data_neg_control_rep4             0.8043958             0.8145591
## data_neg_control_rep5             1.0000000             0.8135615
## data_neg_control_rep6             0.8135615             1.0000000

2.6 Clustering samples

clusterSamples(meth.neg, dist = "correlation", method = "ward", plot = TRUE)
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

## 
## Call:
## hclust(d = d, method = HCLUST.METHODS[hclust.method])
## 
## Cluster method   : ward.D 
## Distance         : pearson 
## Number of objects: 6
hc <- clusterSamples(meth.neg, dist = "correlation", method = "ward", plot = FALSE)
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

2.7 PCA

PCASamples(meth.neg, screeplot = TRUE)

PCASamples(meth.neg)

2.8 Differential methylation

myDiff.neg <- calculateDiffMeth(meth.neg, mc.cores = detectCores() - 1, overdispersion = "MN")
## two groups detected:
##  will calculate methylation difference as the difference of
## treatment (group: 1) - control (group: 0)
## 
## checking wether tabix file already exists:
## /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b0365908e01.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b0365908e01.txt.bgz
myDiff.neg.GR <- as(myDiff.neg,"GRanges")
myDiff.neg.GR.df <- data.frame(myDiff.neg.GR)

write.table(myDiff.neg.GR.df, file = gzfile("methylKit_negative_control.txt.gz", compression = 3),
            sep = "\t", row.names = F, quote = F)

tiles=tileMethylCounts(myobj.neg,win.size=1000,step.size=1000)
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep1_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep1_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep2_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep2_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep3_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep3_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep4_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep4_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep5_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep5_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_neg_control_rep6_tiled.txt.bgz
## tabix file already exists, renaming output file:
## ./output/methylDB/data_neg_control_rep6_tiled_1.txt.bgz
## HINT: consider using 'suffix' argument to write different function calls to different files
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
meth.neg.r <- unite(tiles)
## uniting...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylBase_2b03781eda37.txt.bgz
myDiff.neg.r <- calculateDiffMeth(meth.neg.r, mc.cores = detectCores() - 1, overdispersion = "MN")
## two groups detected:
##  will calculate methylation difference as the difference of
## treatment (group: 1) - control (group: 0)
## 
## checking wether tabix file already exists:
## /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b03781eda37.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b03781eda37.txt.bgz
myDiff.neg.GR.r <- as(myDiff.neg.r,"GRanges")
myDiff.neg.GR.df.r <- data.frame(myDiff.neg.GR.r)

write.table(myDiff.neg.GR.df.r, file = gzfile("methylKit_negative_control_regions.txt.gz", 
                                              compression = 3),
            sep = "\t", row.names = F, quote = F)

3 Simulated

3.1 Input files

files.sim <- list.files("./input", pattern = "sim", full.names = T)

files.list.sim <- as.list(files.sim)
names(files.list.sim) <- as.character(sapply(basename(files.sim), function(x) strsplit(x, "\\.")[[1]][1]))

myobj.sim <- methRead(
  location = files.list.sim, sample.id = as.list(names(files.list.sim)),
  assembly = "hg19", pipeline = "bismarkCoverage", header = F, skip = 0,
  dbtype = "tabix", treatment = c(0,0,0,1,1,1), dbdir = "./output/methylDB"
)
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep1.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep2.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep3.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep4.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep5.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...
## Taking input= as a system command ('gunzip -c ./input/data_sim_rep6.bismark.cov.gz') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure envionment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.
## compressing the file with bgzip...
## making tabix index...

3.2 Histogram of CpG methylation

par(mfrow = c(3, 2), cex = 0.6, cex.axis = 1.3, cex.lab = 1.3, cex.main = 1.2)
tmp <- sapply(1:length(files.list.sim), function(x)
  getMethylationStats(myobj.sim[[x]], plot = TRUE, both.strands = FALSE))

3.3 Histogram of CpG coverage

par(mfrow = c(3, 2), cex = 0.6, cex.axis = 1.3, cex.lab = 1.3, cex.main = 1.2)
tmp <- sapply(1:length(files.list.sim), function(x)
  getCoverageStats(myobj.sim[[x]], plot = TRUE, both.strands = FALSE))

3.4 Merging samples

meth.sim <- unite(myobj.sim)
## uniting...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylBase_2b0319ad3cb2.txt.bgz

3.5 Sample Correlation

getCorrelation(meth.sim, plot = TRUE)
##               data_sim_rep1 data_sim_rep2 data_sim_rep3 data_sim_rep4
## data_sim_rep1     1.0000000     0.8171566     0.8168559     0.8224698
## data_sim_rep2     0.8171566     1.0000000     0.8087033     0.8083807
## data_sim_rep3     0.8168559     0.8087033     1.0000000     0.8069816
## data_sim_rep4     0.8224698     0.8083807     0.8069816     1.0000000
## data_sim_rep5     0.8124930     0.7963951     0.8010027     0.8096318
## data_sim_rep6     0.8236267     0.8162881     0.8099493     0.8263965
##               data_sim_rep5 data_sim_rep6
## data_sim_rep1     0.8124930     0.8236267
## data_sim_rep2     0.7963951     0.8162881
## data_sim_rep3     0.8010027     0.8099493
## data_sim_rep4     0.8096318     0.8263965
## data_sim_rep5     1.0000000     0.8145349
## data_sim_rep6     0.8145349     1.0000000

3.6 Clustering samples

clusterSamples(meth.sim, dist = "correlation", method = "ward", plot = TRUE)
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

## 
## Call:
## hclust(d = d, method = HCLUST.METHODS[hclust.method])
## 
## Cluster method   : ward.D 
## Distance         : pearson 
## Number of objects: 6
hc <- clusterSamples(meth.sim, dist = "correlation", method = "ward", plot = FALSE)
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

3.7 PCA

PCASamples(meth.sim, screeplot = TRUE)

PCASamples(meth.sim)

3.8 Differential methylation

myDiff.sim <- calculateDiffMeth(meth.sim, mc.cores = detectCores() - 1, overdispersion = "MN")
## two groups detected:
##  will calculate methylation difference as the difference of
## treatment (group: 1) - control (group: 0)
## 
## checking wether tabix file already exists:
## /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b0319ad3cb2.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b0319ad3cb2.txt.bgz
myDiff.sim.GR <- as(myDiff.sim,"GRanges")
myDiff.sim.GR.df <- data.frame(myDiff.sim.GR)

write.table(myDiff.sim.GR.df, file = gzfile("methylKit_sim_data.txt.gz", compression = 3),
            sep = "\t", row.names = F, quote = F)

tiles=tileMethylCounts(myobj.sim,win.size=1000,step.size=1000)
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep1_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep2_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep3_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep4_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep5_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## 
## checking wether tabix file already exists:
## ./output/methylDB/data_sim_rep6_tiled.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
meth.sim.r <- unite(tiles)
## uniting...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylBase_2b037ed24458.txt.bgz
myDiff.sim.r <- calculateDiffMeth(meth.sim.r, mc.cores = detectCores() - 1, overdispersion = "MN")
## two groups detected:
##  will calculate methylation difference as the difference of
## treatment (group: 1) - control (group: 0)
## 
## checking wether tabix file already exists:
## /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b037ed24458.txt.bgz
## tabix file is new.
## continuing now ...
## compressing the file with bgzip...
## making tabix index...
## flatfile located at: /Users/tanwar/mnt/IM/DKT/courses/sta426-project-dmr-comparison/methylKit/output/methylDB/methylDiff_2b037ed24458.txt.bgz
myDiff.sim.GR.r <- as(myDiff.sim.r,"GRanges")
myDiff.sim.GR.df.r <- data.frame(myDiff.sim.GR.r)

write.table(myDiff.sim.GR.df.r, file = gzfile("methylKit_sim_data_regions.txt.gz", 
                                              compression = 3),
            sep = "\t", row.names = F, quote = F)